# coding=utf-8
"""
    @project: 15python_spider
    @Author：frank
    @file： 02_maoyan_film_csv.py
    @date：2023/12/26 20:32
"""

import random
import time
from lxml import etree
import requests

from day02.utils.UserAgent import user_agent_list


class MaoyanSpider(object):
    def __init__(self):
        self.url = 'https://www.maoyan.com/board/4?offset={}'
        self.ua_list = user_agent_list

    # 获取数据
    def get_page(self, url):
        # 每次使用随机的user-agent
        headers = {
            'User-Agent': random.choice(self.ua_list)
        }
        req = requests.get(url=url, headers=headers)
        req.encoding = 'utf-8'
        html_content = req.text


        # req = request.Request(url=url, headers=headers)
        # res = request.urlopen(req)
        # html_content = res.read().decode('utf-8')

        self.parse_page(html_content)


    def parse_page(self, html_content):
        movie_dict={}
        # 创建解析对象
        parse_html = etree.HTML(html_content)
        # 1 基准xpath：匹配每个电影信息的节点对象
        dd_list = parse_html.xpath('//dl[@class="board-wrapper"]/dd')
        # 2 for依次遍历每个节点对象，获取信息
        for dd in dd_list:
            #名称
            movie_dict['name'] = dd.xpath('./a/@title')[0].strip()
            movie_dict['star'] = dd.xpath('.//p[@class="star"]/text()')[0].strip()
            movie_dict['time'] = dd.xpath('.//p[@class="releasetime"]/text()')[0].strip()
            print(movie_dict)

    def main(self):
        for i in range(10):
            url = self.url.format(i * 10)
            self.get_page(url)
            time.sleep(random.randint(1, 3))


if __name__ == "__main__":
    start = time.time()
    spider = MaoyanSpider()
    spider.main()
    end = time.time()
    print('执行时间:%.2f' % (end - start))
